# -------------------------------------------------------------------
# Purpose: Creates Table B53
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Regressions
i <- list()
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws + gisjoin_1900[year], countyLevel19001940)))


# Create table
setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    entropy_namelast_mp_adjp_100m_ws = "Surname diversity ($<$ 100 miles)",
    entropy_namelast_mp_adjp_100_200m_ws = "Surname diversity (100 $<$ 200 miles)",
    entropy_namelast_mp_adjp_200_300m_ws = "Surname diversity (200 $<$ 300 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws = "Predicted surname diversity ($<$ 100 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws = "Predicted surname diversity (100 $<$ 200 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws = "Predicted surname diversity (200 $<$ 300 miles)",
    sum_n_namelast_mp_adjp_ws = "Population",
    sum_n_namelast_mp_adjp_100m_ws = "Population ($<$ 100 miles)",
    sum_n_namelast_mp_adjp_100_200m_ws = "Population (100 $<$ 200 miles)",
    sum_n_namelast_mp_adjp_200_300m_ws = "Population (200 $<$ 300 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws = "Predicted population ($<$ 100 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws = "Predicted population (100 $<$ 200 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws = "Predicted population (200 $<$ 300 miles)",
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutputappendix, "tableB53.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
move_table_row(tablename, "Observations", "bottomrule")
writeLines(readLines(tablename)[-c(6, 9:25)], tablename)

tempname <- file.path(poutputappendix, "temp.tex")
etable(i,
  cluster = ~statefip,
  stage = 1,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = tempname, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(tempname)
estimates_row1 <- collapse_stage1(estimates_rows, c(2, 6, 12, 3, 7, 13))
estimates_row2 <- collapse_stage1(estimates_rows, c(NA, 8, 14, NA, NA, 15))
estimates_row3 <- collapse_stage1(estimates_rows, c(4, 9, 16, 5, 10, 17))
estimates_row4 <- collapse_stage1(estimates_rows, c(NA, 11, 18, NA, NA, 19))
add_table_row(tablename, "midrule", c(
  "& \\multicolumn{3}{c}{Surname diversity} &  \\multicolumn{3}{c}{Surname diversity ($<$ 100 miles)}\\\\",
  "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}",
  estimates_row1,
  "& \\multicolumn{3}{c}{Surname diversity (100 $<$ 200 miles)} &  \\multicolumn{3}{c}{Surname diversity (200 $<$ 300 miles)}\\\\",
  "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}",
  estimates_row2,
  "& \\multicolumn{3}{c}{Population} &  \\multicolumn{3}{c}{Population ($<$ 100 miles)}\\\\",
  "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}",
  estimates_row3,
  "& \\multicolumn{3}{c}{Population (100 $<$ 200 miles)} &  \\multicolumn{3}{c}{Population (200 $<$ 300 miles)}\\\\",
  "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}",
  estimates_row4
  ))

cat("Table B53 saved to:", tablename, "\n")